*-------------------------------------------------------------------------------*
* DIRECTORIES
*-------------------------------------------------------------------------------*
local l_direc_data `1'
local l_direc_code `2' 
local l_direc_figtab `3' 

local direc_data_raw  `l_direc_data'/raw
local direc_data_clean `l_direc_data'/clean



*---------------------------------------------------------------------------------*
* SUM STAT TABLE: A
*---------------------------------------------------------------------------------*
* NUM EDS AND JOURNAL YEAR RANGES
use "`direc_data_clean'/mjt_base.dta", clear
keep journal_name journal_nlmid year
gduplicates drop
tempfile T
save `T', replace

import delimited "`direc_data_raw'/author_editor_crosswalk.csv", clear
keep author_id journal_nlmid editor_startdate editor_enddate
keep if author_id != ""

joinby journal_nlmid using `T'

keep if year >= editor_startdate & year <= editor_enddate
gen year_of_tenure = year - editor_startdate

sort journal_name journal_nlmid author_id
order journal_name journal_nlmid author_id

gen numed = 1
gcollapse (sum) numed (mean) year_of_tenure, by(journal_name journal_nlmid year)

drop if journal_nlmid=="8303128" | journal_nlmid=="2985191R" | journal_nlmid=="0410462"

tempfile Tedjyear
save `Tedjyear', replace

* PUBS PER JOURNAL-YEAR
preserve
	import delimited "`direc_data_raw'/journal_list.csv", clear varnames(1)
	rename nlmid journal_nlmid
	replace journal_nlmid = "0" + journal_nlmid if regexm(leading,"should have 0")
	replace journal_nlmid = "00" + journal_nlmid if regexm(leading,"should have two 0")
	keep journal journal_nlmid
	foreach l_punc in . , : the {
		replace journal = subinstr(journal,"`l_punc'"," ",.)
	}
	replace journal = upper(itrim(trim(itrim(trim(journal)))))
	gduplicates drop
	compress
	tempfile T_j
	save `T_j', replace 
	use "`direc_data_clean'/pmid_info.dta" if pmid_year >= 1950 & pmid_year <= 2008, clear
	joinby journal using `T_j'
	drop journal
	rename pmid_year year
	
	joinby journal_nlmid year using `Tedjyear'
	
	gen numpub = 1
	gcollapse (sum) numpub, by(journal_nlmid year)
	tempfile T
	save `T', replace
restore
joinby journal_nlmid year using `T', unmatched(master)
tab _m
drop _m

* PUBS PER EDITORS
preserve
	use "`direc_data_clean'/editor_info.dta", clear
	keep author_id
	gduplicates drop
	gen year = _n+1950
	replace year = . if !(year >= 1950 & year <= 2008)
	fillin author_id year
	drop _f
	keep if year >= 1950 & year <= 2008
	drop if year == . 
	joinby author_id using "`direc_data_clean'/editor_info.dta"
	gcollapse (min) editor_startyear (max) editor_endyear , by(author_id journal_nlmid year)
	keep if year >= editor_startyear & year <= editor_endyear
	joinby author_id journal_nlmid using "`direc_data_clean'/e_shares_ejm.dta"
	gcollapse (sum) eshr2_frcnt_all, by(journal_nlmid year)

	joinby journal_nlmid year using `Tedjyear'
	gen pubpered = eshr2_frcnt_all/numed
	
	keep pubpered journal_nlmid year
	tempfile T
	save `T', replace
restore
joinby journal_nlmid year using `T', unmatched(master)
tab _m
drop _m

* UNIQUE MESH PER YEAR: PUBS
preserve 
	use "`direc_data_clean'/mjt_regready_agg050.dta", clear
	gen anypubsinmesh = (jshr_frcnt_all>0)
	
	su fe_m
	local l_totmesh = `r(max)'
	
	gcollapse (sum) anypubsinmesh, by(journal_nlmid year)
	gen jshareoftree = anypubsinmesh/`l_totmesh'
	
	keep jshareoftree journal_nlmid year
	tempfile T
	save `T', replace
restore
joinby journal_nlmid year using `T', unmatched(master)
tab _m
drop _m

* UNIQUE MESH PER YEAR: EDS
preserve 
	use "`direc_data_clean'/mjt_regready_agg050.dta", clear
	gen anyedsinmesh = (eshr2_frcnt_all>0)
	
	su fe_m
	local l_totmesh = `r(max)'
	
	gcollapse (sum) anyedsinmesh, by(journal_nlmid year)
	gen edshareoftree = anyedsinmesh/`l_totmesh'
	
	keep edshareoftree journal_nlmid year
	tempfile T
	save `T', replace
restore
joinby journal_nlmid year using `T', unmatched(master)
tab _m
drop _m

keep if numpub != .

keep if year >= 1985

eststo clear
estpost su year numed year_of_tenure pubpered edshareoftree numpub jshareoftree,d
esttab using "`l_direc_figtab'/tab_sumstat_1.tex", nonumber ///
	cells("count mean sd min p25 p50 p75 max") noobs ///
	replace
eststo clear


	
*---------------------------------------------------------------------------------*
* SUM STAT TABLE: B
*---------------------------------------------------------------------------------*
use "`direc_data_clean'/mjt_regready_agg050.dta", clear

foreach l_var of varlist jshr_frcnt_all eshr2_frcnt_all {
	gen A_`l_var' = `l_var'>0
	gen NZ_`l_var' = `l_var' if `l_var'>0
}

keep if !regexm(journal_name, "(Rest of PubMed)")

keep if year >= 1985

eststo clear
estpost su A_jshr_frcnt_all NZ_jshr_frcnt_all A_eshr2_frcnt_all NZ_eshr2_frcnt_all,d
esttab using "`l_direc_figtab'/tab_sumstat_2.tex", nonumber ///
	cells("count mean sd min p25 p50 p75 max") noobs ///
	replace
eststo clear

	

*---------------------------------------------------------------------------------*
* MESH TREND FIG
*---------------------------------------------------------------------------------*
use "`direc_data_clean'/mjt_regready_agg100.dta", clear
gcollapse (sum) jshr_frcnt_all , by(year mesh_term mesh_tree)

gen mesh = ""
replace mesh = ""
replace mesh = "aids" if regexm(mesh_term, "(human immunodeficiency virus|acquired immunodeficiency syndrome|acute retroviral syndrome|^aids|^hiv)") 
replace mesh = "mRNA" if regexm(mesh_term,"(rna, messenger|mrna|^codon|^riboswitch|^rna caps|^rna splice site|^untranslated regions)")
replace mesh = "csec" if regexm(mesh_term,"(cesarean)")
replace mesh = "cmpsim" if regexm(mesh_term,"(computer simulation)")
replace mesh = "cmpcare" if regexm(mesh_term,"(computer-assisted)")
replace mesh = "rep" if regexm(mesh_term,"(^reproducibility|^data accuracy|^dimensional measurement accuracy)")
replace mesh = "bcancer" if regexm(mesh_term,"(^breast cyst|breast neoplasms|carcinoma, ductal, breast|breast self-examination)")
replace mesh = "sickle" if regexm(mesh_term,"(sickle)")
replace mesh = "kidney" if regexm(mesh_term,"(kidney|dialysis)")
tab mesh,m

preserve
	keep if year>=1975 & year<=2008 
	
	gcollapse (sum) jshr_frcnt_all, by(mesh year)
	
	egen ysum_jshr_frcnt_all = sum(jshr_frcnt_all), by(year)
	
	gen Y = (jshr_frcnt_all/ysum_jshr_frcnt_all)
	
	tw 	(line Y year if mesh=="aids" & year>= 1982, lw(vthick) lpat(solid) lc(gs2)) ///
		/*(line Y year if mesh=="mri" , lw(thick) lpat(vshortdash))*/ ///
		/*(line Y year if mesh=="cmpsim" , lw(thick) lpat(vshortdash))*/ ///
		(line Y year if mesh=="rep" & year>= 1984, lw(vthick) lpat(vshortdash) lc(gs4)) ///
		(line Y year if mesh=="csec" , lw(vthick) lpat(dash_dot) lc(gs6)) ///
		(line Y year if mesh=="mRNA" , lw(thick) lpat(longdash) lc(gs8)) ///
		(line Y year if mesh=="kidney" , lw(thick) lpat(solid) lc(gs10)) ///
		if year >1977, ///
		legend(order(2 "Reproducibility" 1 "HIV/AIDS" 4 "mRNA"  5 "Kidneys & dialysis"  3 "Cesarean section" ) region(lc(white)) cols(1) position(3)) ///
		xtitle("") xlab(1980 1985 1990 1995 2000 2005, grid gmax gmin) ///
		ytitle("Share of" "PubMed", orient(horiz)) ylab(0 .002 .004 .006, format(%5.3fc) angle(horiz) grid gmax gmin) ///
		graphregion(color(white) lc(white)) bgcolor(white) ///
		xsize(10) ysize(5) scale(1.25)
	
	gr export "`l_direc_figtab'/fig_meshtrend.pdf", as(pdf) replace
	
	gr_edit .plotregion1.plot2.draw_view.setstyle, style(no)
	gr export "`l_direc_figtab'/fig_meshtrend_4.pdf", as(pdf) replace
	
	gr_edit .plotregion1.plot4.draw_view.setstyle, style(no)
	gr export "`l_direc_figtab'/fig_meshtrend_3.pdf", as(pdf) replace

	gr_edit .plotregion1.plot5.draw_view.setstyle, style(no)
	gr export "`l_direc_figtab'/fig_meshtrend_2.pdf", as(pdf) replace
	
	gr_edit .plotregion1.plot1.draw_view.setstyle, style(no)
	gr export "`l_direc_figtab'/fig_meshtrend_1.pdf", as(pdf) replace
	
	gr_edit .plotregion1.plot3.draw_view.setstyle, style(no)
	gr export "`l_direc_figtab'/fig_meshtrend_0.pdf", as(pdf) replace
	
restore



*---------------------------------------------------------------------------------*
* SUM STAT TABLE: APPENDIX
*---------------------------------------------------------------------------------*
* NUM EDS AND JOURNAL YEAR RANGES
use "`direc_data_clean'/mjt_base.dta", clear
keep journal_name journal_nlmid year

drop if journal_nlmid=="8303128" | journal_nlmid=="2985191R" | journal_nlmid=="0410462"

gduplicates drop
tempfile T
save `T', replace

import delimited "`direc_data_raw'/author_editor_crosswalk.csv", clear
keep author_id journal_nlmid editor_startdate editor_enddate
keep if author_id != ""

joinby journal_nlmid using `T'

keep if year >= editor_startdate & year <= editor_enddate

sort journal_name journal_nlmid author_id
order journal_name journal_nlmid author_id

gen numed = 1
gcollapse (sum) numed, by(journal_name journal_nlmid year)

tempfile Tedperyear
save `Tedperyear', replace

gcollapse 	(min) year_min=year (max) year_max=year ///
			(mean) numed_mea=numed (sd) numed_sd=numed ///
			, by(journal_name journal_nlmid)


* PUBS PER JOURNAL-YEAR
preserve
	import delimited "`direc_data_raw'/journal_list.csv", clear varnames(1)
	rename nlmid journal_nlmid
	replace journal_nlmid = "0" + journal_nlmid if regexm(leading,"should have 0")
	replace journal_nlmid = "00" + journal_nlmid if regexm(leading,"should have two 0")
	keep journal journal_nlmid
	foreach l_punc in . , : the {
		replace journal = subinstr(journal,"`l_punc'"," ",.)
	}
	replace journal = upper(itrim(trim(itrim(trim(journal)))))
	gduplicates drop
	compress
	tempfile T_j
	save `T_j', replace 
	use "`direc_data_clean'/pmid_info.dta" if pmid_year >= 1950 & pmid_year <= 2008, clear
	joinby journal using `T_j'
	drop journal
	rename pmid_year year
	
	joinby journal_nlmid year using `Tedperyear'
	
	gen numpub = 1
	gcollapse (sum) numpub, by(journal_nlmid year)
	gcollapse (mean) numpub_mea=numpub (sd) numpub_sd=numpub, by(journal_nlmid)
	tempfile T
	save `T', replace
restore
joinby journal_nlmid using `T', unmatched(master)
tab _m
drop _m
	
	
* PUBS PER EDITORS
preserve
	use "`direc_data_clean'/editor_info.dta", clear
	keep author_id
	gduplicates drop
	gen year = _n+1950
	replace year = . if !(year >= 1950 & year <= 2008)
	fillin author_id year
	drop _f
	keep if year >= 1950 & year <= 2008
	drop if year == . 
	joinby author_id using "`direc_data_clean'/editor_info.dta"
	gcollapse (min) editor_startyear (max) editor_endyear , by(author_id journal_nlmid year)
	keep if year >= editor_startyear & year <= editor_endyear
	joinby author_id journal_nlmid using "`direc_data_clean'/e_shares_ejm.dta"
	gcollapse (sum) eshr2_frcnt_all, by(journal_nlmid year)

	joinby journal_nlmid year using `Tedperyear'
	gen pubpered = eshr2_frcnt_all/numed
	
	gcollapse (mean) pubpered_mea=pubpered (sd) pubpered_sd=pubpered, by(journal_name)
	tempfile T
	save `T', replace
restore
joinby journal_name using `T', unmatched(master)
tab _m
drop _m


* TABLE
order journal_nlmid journal_name year_min year_max numed_mea numed_sd ///
	pubpered_mea pubpered_sd numpub_mea numpub_sd 

foreach l_var of varlist *_mea *_sd {
	replace `l_var' = round(`l_var', 0.1) if `l_var' > 10
	replace `l_var' = round(`l_var', 0.01) if `l_var' > 1 & `l_var' < 10
	replace `l_var' = round(`l_var', 0.001) if `l_var' < 1
}
foreach l_var of varlist *_sd {
	tostring `l_var', replace force
	replace `l_var' = substr(`l_var',1,4)
	replace `l_var' = "(" + `l_var' + ")"
}
replace journal_name = "\emph{" + journal_name + "}"

texsave journal_name year_min year_max numed_mea numed_sd ///
	pubpered_mea pubpered_sd numpub_mea numpub_sd  ///
	using "`l_direc_figtab'/tab_sumstat_app.tex", replace


